#!/usr/bin/env jython

"""
Reads in a specific text file
Run by: jython text_reader.py <filename> <language-code>
"""

# Fix classpath scanning - otherise uimaFIT will not find the UIMA types
from java.lang import Thread
from org.python.core.imp import *
Thread.currentThread().contextClassLoader = getSyspathJavaLoader()

# Dependencies and imports for DKPro modules
from jip.embed import require
require('de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.io.text-asl:1.6.1')
from de.tudarmstadt.ukp.dkpro.core.io.text import *


# uimaFIT imports
from org.apache.uima.fit.util.JCasUtil import *
from org.apache.uima.fit.pipeline.SimplePipeline import *
from org.apache.uima.fit.factory.CollectionReaderFactory import *
from org.apache.uima.fit.factory.AnalysisEngineFactory import *


# Access to commandline arguments
import sys

# Check that all necessary arguments have been passed to the program
if len(sys.argv) < 3:
    print globals()['__doc__'] % locals()
    sys.exit(1)


# Define different parts of the pipeline
documentReader = createReaderDescription(TextReader, 
										 TextReader.PARAM_PATH, sys.argv[1],
										 TextReader.PARAM_LANGUAGE, sys.argv[2],
										 TextReader.PARAM_PATTERNS, "*.txt")

# Assemble and run the pipeline 
pipeline = iteratePipeline(
	documentReader
);

for jcas in pipeline:  
	print jcas.documentText
